library(here)
## here() starts at /Users/adelheid/Documents/MEDS/EDS_222/eds222_final_project
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tmap)
library(lfe)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
I used data from two sources: California Department of fish and wildlife and California Protected areas database.
salmon_populations <- read_csv(here("data/Salmonid_Population_Monitoring_Data_CMPv2021.csv"))
## New names:
## Rows: 3918 Columns: 26
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (18): Watershed, Population, Species, Life Stage, Origin, Run designatio... dbl
## (4): ID, CDFW region, GEO_ID_POLY, GEO_ID_PT num (3): Value, X95 lower CI, X95
## upper CI lgl (1): ...26
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...26`
watershed <- st_read(here("data/ds3001/ds3001.gdb"))
## Multiple layers are present in data source /Users/adelheid/Documents/MEDS/EDS_222/eds222_final_project/data/ds3001/ds3001.gdb, reading layer `ds3001'.
## Use `st_layers' to list all layer names and their type in a data source.
## Set the `layer' argument in `st_read' to read a particular layer.
## Warning in evalq((function (..., call. = TRUE, immediate. = FALSE, noBreaks. =
## FALSE, : automatically selected the first layer in a data source containing more
## than one.
## Reading layer `ds3001' from data source
## `/Users/adelheid/Documents/MEDS/EDS_222/eds222_final_project/data/ds3001/ds3001.gdb'
## using driver `OpenFileGDB'
## Simple feature collection with 157 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -370211.5 ymin: -445468.7 xmax: 179047.7 ymax: 465217.3
## Projected CRS: NAD83 / California Albers
protected_areas <- st_read(here("data/CPAD_2022a/CPAD_2022a_Holdings.dbf"))
## Reading layer `CPAD_2022a_Holdings' from data source
## `/Users/adelheid/Documents/MEDS/EDS_222/eds222_final_project/data/CPAD_2022a/CPAD_2022a_Holdings.dbf'
## using driver `ESRI Shapefile'
## Simple feature collection with 94534 features and 35 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -374984.2 ymin: -604454.8 xmax: 540016.3 ymax: 449743.2
## Projected CRS: NAD83 / California Albers
In this section I refine the data, for this study I am interested in looking at adult population counts, so I select data on Adults.
Not every watershed polygon provided by the CMP has count data for it. I am taking out polygons with no adult data associated. There are 157 polygons of watersheds, I have adult counts for 110 of them.
spawning_data <- salmon_populations |> filter(`Life Stage` %in% "Adult") |> #all adult salmon
select("Population", "Watershed", "Species", Brood_year = "Brood Year", "GEO_ID_POLY", "Value", "Metric", "Estimation method") |> #selecting relevant columns
filter(!is.na(GEO_ID_POLY)) #taking out data with no matching spatial id
watershed_id <- unique(spawning_data$GEO_ID_POLY) #making a list of all the watersheds that have adult population data
watershed_new <- watershed |> filter(GEO_ID_POL %in% watershed_id) |> st_make_valid() #filter to watersheds that have spawning data available
protected <- protected_areas |> select(UNIT_NAME, YR_EST) |> st_make_valid()
#selecting relevant columns
protected2 <- protected |> filter(YR_EST < 1981| YR_EST %in% c(NA)) #remove after 1981
#Filter to protected areas within watersheds of interest and find the area of intersection for each
intersect_polygons <- st_intersection(protected2, watershed_new) |>
dplyr::select(Name, GEO_ID_POL) #select relevant columns
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
#find total area protected for each watershed
total_overlap <- intersect_polygons |> group_by(Name) |> #group by watershed
summarize(geometry = st_union(geometry))|> #combine geometries within watershed
mutate(total_protected = st_area(geometry)) #find total protected
# dropping geometry
total_overlap_geomless <- total_overlap |> st_drop_geometry()
watershed_area <- watershed_new |> mutate(total_area = st_area(Shape)) #find the total area of each watershed
watershed_protected <- left_join(watershed_area, total_overlap_geomless, by = "Name") #add area protected column by joining
#calculate percent protected
watershed_final <- watershed_protected |>
mutate(percent_protected =
as.numeric((total_protected)/ as.numeric(total_area)) *100) |>
mutate(percent_protected = round(percent_protected, digits = 0)) |> #round
mutate(percent_protected = replace_na(percent_protected, 0)) #change NA to 0
#drop geometry and make it a data frame
watershed_geomless <- st_drop_geometry(watershed_final) |> as.data.frame() |>
select(- Method_Typ)
#combine spawning observations with percent protected
all_data <- left_join(spawning_data, watershed_geomless, by = c("GEO_ID_POLY" = "GEO_ID_POL")) |> select("Population", "Species", "Value", "percent_protected", "Name", "Brood_year", "Metric", "Estimation method")
tmap_mode("view")
## tmap mode set to interactive viewing
tmap_options(check.and.fix = TRUE)
tm_shape(total_overlap) + tm_fill(col = "#004600") + #map protected portions
tm_shape(watershed_new) + tm_borders(col = "blue") + tm_add_legend(labels = c( "Watershed Boundary", "Protected area"), col = c("blue", "#004600")) #map watershed boundaries
## Warning: The shape total_overlap is invalid (after reprojection). See
## sf::st_is_valid